# -*-coding:utf-8-*-
# __author__ = 'PP'

import string
import traceback
from twisted.enterprise import adbapi
from scrapy import Selector
from scrapy import Request
from scrapy.utils.project import get_project_settings
from scrapy import log
from goods.items import GoodsItem
from scrapy.contrib.spiders import CrawlSpider, Rule
from scrapy.contrib.linkextractors.sgml import SgmlLinkExtractor


class CpuSpider(CrawlSpider):
    name = 'cpu'

    def __init__(self):

        # http://www.youcpu.com/zh-cn/cpu/Intel-Core-i7-3770K-8M-Cache-up-to-3.90-GHz
        rules = [
            Rule(SgmlLinkExtractor(allow=('/zh-cn/cpu/*')), 'parseIndex',
                 process_request='processRequest'),  #产品首页
        ]
        self.baseUrl = 'http://www.youcpu.com/'
        self.allowed_domains = ['www.youcpu.com']
        self.start_urls = ['http://www.youcpu.com/zh-cn/archives']
        self.rules = rules
        print "111111"

    def parseIndex(self, response):
        print response

    def processRequest(self, req):
        log.msg('检查:' + req.url)
        return None


